# package
require("readr")
require("dplyr")
require("tidyr")
require("ggplot2")
require("nnet")
require("effects")
theme_set(theme_minimal(base_size = 12, base_family = 'Calibri'))


# load
UTAS2020 <- read_csv("http://www.masaki.j.u-tokyo.ac.jp/utas/2020UTASV20210319.csv", 
                     locale = locale(encoding = "CP932"))


# explanatory variable
## term based arrival date
UTAS2020$Term <- as.character(UTAS2020$Term)
table(UTAS2020$Term, useNA = "always")


# outcome variables
## whether select economic / employment measure as the most pressing issue
UTAS2020$Q30[UTAS2020$Q30 %in% c(8, 99)] <- NA
UTAS2020$Economic <- ifelse(UTAS2020$Q30 == 2, 1, 0)
table(UTAS2020$Economic, useNA = "always")

## the most pressing issue
table(UTAS2020$Q30, useNA = "always")
prop.table(table(UTAS2020$Q30[UTAS2020$Term == "1" & !is.na(UTAS2020$Q30)], useNA = "always"))


# control variables
## female
UTAS2020$Q37[UTAS2020$Q37 == 99] <- NA
UTAS2020$Female <- UTAS2020$Q37 - 1
table(UTAS2020$Female, useNA = "always")

## age
UTAS2020$Q38[UTAS2020$Q38 == 99] <- NA
UTAS2020$Age <- UTAS2020$Q38
table(UTAS2020$Age, useNA = "always")

## education
UTAS2020$Q41[UTAS2020$Q41 == 7] <- NA
UTAS2020$Education <- UTAS2020$Q41
table(UTAS2020$Education, useNA = "always")

## self employed
UTAS2020$SelfEmployed <- ifelse(UTAS2020$Q39_1 == 3, 1, 0)
UTAS2020$SelfEmployed[UTAS2020$Q39_1 == 99] <- NA
table(UTAS2020$SelfEmployed, useNA = "always")

## unemployed
UTAS2020$Unemployed <- ifelse(UTAS2020$Q39_1 == 6, 1, 0)
UTAS2020$Unemployed[UTAS2020$Q39_1 == 99] <- NA
table(UTAS2020$Unemployed, useNA = "always")

## standard of living
UTAS2020$Q44[UTAS2020$Q44 == 99] <- NA
UTAS2020$Living <- UTAS2020$Q44
table(UTAS2020$Living, useNA = "always")

## partisanship
UTAS2020$Partisanship <- NA
UTAS2020$Partisanship[UTAS2020$Q34 == 1] <- "LDP"
UTAS2020$Partisanship[UTAS2020$Q34 == 2] <- "CDP"
UTAS2020$Partisanship[UTAS2020$Q34 == 3] <- "NDP"
UTAS2020$Partisanship[UTAS2020$Q34 == 4] <- "Komei"
UTAS2020$Partisanship[UTAS2020$Q34 == 5] <- "JCP"
UTAS2020$Partisanship[UTAS2020$Q34 == 6] <- "JIP"
UTAS2020$Partisanship[UTAS2020$Q34 == 7] <- "SDP"
UTAS2020$Partisanship[UTAS2020$Q34 == 9] <- "NHK"
UTAS2020$Partisanship[UTAS2020$Q34 == 10] <- "Reiwa"
UTAS2020$Partisanship[UTAS2020$Q34 == 11] <- "Others"
UTAS2020$Partisanship[UTAS2020$Q34 == 12] <- "Independent"
UTAS2020$Partisanship <- factor(UTAS2020$Partisanship, 
                                levels = c("LDP", "CDP", "NDP", "Komei", "JCP", "JIP", 
                                           "SDP", "NHK", "Reiwa", "Others", "Independent"))
table(UTAS2020$Partisanship, useNA = "always")

## residential area
UTAS2020$PR_Area <- NA
UTAS2020$PR_Area[UTAS2020$PR == 1] <- "Hokkaido"
UTAS2020$PR_Area[UTAS2020$PR == 2] <- "Tohoku"
UTAS2020$PR_Area[UTAS2020$PR == 3] <- "NorthKanto"
UTAS2020$PR_Area[UTAS2020$PR == 4] <- "SouthKanto"
UTAS2020$PR_Area[UTAS2020$PR == 5] <- "Tokyo"
UTAS2020$PR_Area[UTAS2020$PR == 6] <- "Hokuriku"
UTAS2020$PR_Area[UTAS2020$PR == 7] <- "Tokai"
UTAS2020$PR_Area[UTAS2020$PR == 8] <- "Kinki"
UTAS2020$PR_Area[UTAS2020$PR == 9] <- "Chugoku"
UTAS2020$PR_Area[UTAS2020$PR == 10] <- "Shikoku"
UTAS2020$PR_Area[UTAS2020$PR == 11] <- "Kyushu"
UTAS2020$PR_Area <- factor(UTAS2020$PR_Area, 
                           levels = c("Hokkaido", "Tohoku", "NorthKanto", "SouthKanto", 
                                      "Tokyo", "Hokuriku", "Tokai", "Kinki", "Chugoku", 
                                      "Shikoku", "Kyushu"))
table(UTAS2020$PR_Area, useNA = "always")


# logistic regression
res1_1 <- glm(Economic ~ Term + Female + Age + Education + 
                SelfEmployed + Unemployed + Living + Partisanship + PR_Area, 
              data = UTAS2020, family = binomial(link = "logit"))
summary(res1_1)


# multinomial logistic regression
## estimate
res1_2 <- multinom(Q30 ~ Term + Female + Age + Education + 
                     SelfEmployed + Unemployed + Living + Partisanship + PR_Area, 
                   data = UTAS2020)
summary(res1_2)

## predict
predict_term <- Effect("Term", res1_2, 
                       xlevels = list(Term = 1:4), 
                       given.values = c(Female = 1, Age = mean(UTAS2020$Age, na.rm = T), 
                                        Education = 5, SelfEmployed = 0, Unemployed = 0, 
                                        Living = mean(UTAS2020$Living, na.rm = T), 
                                        Partisanship = 12, PR_Area = 5))
predict_term <- cbind(predict_term$x, 
                      predict_term$prob, 
                      predict_term$se.prob) %>% 
  gather(key = "Class", value = "Rate", -Term)
predict_term$Class <- str_replace_all(predict_term$Class, 
                                      pattern = "se.prob", 
                                      replacement = "SE")
predict_term$Class <- str_replace_all(predict_term$Class, 
                                      pattern = "prob", 
                                      replacement = "Prob")
predict_term <- separate(predict_term, col = Class, sep = "\\.", 
                         into = c("Estimate", "Class"))
predict_term$Class <- recode(predict_term$Class, 
                             `X1` = "Foreign policy", 
                             `X2` = "Economic / employment measures", 
                             `X3` = "Fiscal reconstruction", 
                             `X4` = "Pension / medical and nursing care", 
                             `X5` = "Education", 
                             `X6` = "Nuclear energy policy", 
                             `X7` = "Constitution")
predict_term$Class <- factor(predict_term$Class, 
                             levels = c("Foreign policy", 
                                        "Economic / employment measures", 
                                        "Fiscal reconstruction", 
                                        "Pension / medical and nursing care", 
                                        "Education", 
                                        "Nuclear energy policy", 
                                        "Constitution"))
predict_term <- spread(predict_term, key = "Estimate", value = "Rate")

## plot
predict_term_plot <- ggplot(predict_term, 
                            aes(x = Term, y = Prob, 
                                ymax = Prob + 1.96*SE, 
                                ymin = Prob - 1.96*SE)) + 
  geom_pointrange() + facet_wrap(~Class, ncol = 4) + 
  labs(x = "Term", y = "Predicted Probability") + 
  theme(strip.text.x = element_text(size = 11))
plot(predict_term_plot)
